%env PROJ_LIB=/home/pavel/develop/anaconda2/envs/ds/share/proj/
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
grouped = pd.read_csv('../grouped_regions_trips_count.csv', parse_dates=['time'])
grouped.head()
region_trips_count = grouped.groupby(['region'])['trips_count'].sum()
print 'Число ячеек без поездок: %s' % sum(region_trips_count == 0)
lat_min, lat_max = 40.49612, 40.91553
long_min, long_max = -74.25559, -73.70001
empire_lat, empire_long = 40.748817, -73.985428
# EPSG код взят отсюда: http://spatialreference.org/ref/epsg/?search=new+york+&srtext=Search
NYC_EPSG_CODE = 2261
def get_lat_space(num):
return np.linspace(lat_min, lat_max, num + 1)
def get_long_space(num):
return np.linspace(long_min, long_max, num + 1)
%%time
fig = plt.figure(figsize=(15,15))
m = Basemap(llcrnrlat=lat_min, llcrnrlon=long_min,
urcrnrlat=lat_max, urcrnrlon=long_max,
epsg=NYC_EPSG_CODE)
m.arcgisimage(xpixels=2000, service='NatGeo_World_Map')
empire_x, empire_y = m(empire_long, empire_lat)
m.plot(empire_x, empire_y, 'ro', markersize=10)
plt.text(empire_x - 5000, empire_y - 1000, 'Empire State Building', color='black')
plt.show()
lat_space = get_lat_space(50)
long_space = get_long_space(50)
# scaled_data = np.log(region_trips_count + 1)
scaled_data = region_trips_count + 1
data = scaled_data.values.reshape((50, 50)).T
from matplotlib.colors import LogNorm
def plot_static_map_with_colormesh(lat_space, long_space, mesh_data, cmap='hot', xpixels=2000):
fig = plt.figure(figsize=(17,15))
m = Basemap(llcrnrlat=lat_min, llcrnrlon=long_min,
urcrnrlat=lat_max, urcrnrlon=long_max,
epsg=NYC_EPSG_CODE)
m.arcgisimage(xpixels=xpixels, service='World_Topo_Map')
m.pcolormesh(long_space, lat_space, mesh_data,
norm=LogNorm(vmin=1, vmax=np.max(mesh_data)),
latlon=True, alpha=0.5, cmap=cmap)
empire_x, empire_y = m(empire_long, empire_lat)
m.plot(empire_x, empire_y, 'ro', markersize=10)
plt.text(empire_x - 5000, empire_y - 1000, 'Empire State Building', color='black')
plt.colorbar(orientation='vertical', shrink=0.85, label='Trips Count')
plt.show()
%%time
plot_static_map_with_colormesh(lat_space, long_space, data, xpixels=2000, cmap='YlOrRd')
Для большей наглядности выведем ту же карту, но с разбиением ячеек не 50x50, а 200x200. Данные для этого были посчитаны в ноутбуке первой недели и сохранены в файл.
%%time
grouped_hires = pd.read_csv('../grouped_regions_trips_count_hires.csv')
data_hires = grouped_hires.groupby(['region'])['trips_count'].sum()
data_hires.shape
scaled_data_hires = data_hires + 1
data_hires = scaled_data_hires.values.reshape((200, 200)).T
%%time
plot_static_map_with_colormesh(get_lat_space(200),
get_long_space(200),
data_hires, cmap='hot')
import folium
from IPython.core.display import display, HTML
statue_lat, statue_long = 40.689247, -74.044502
map_center = [np.mean([lat_min, lat_max]), np.mean([long_min, long_max])]
def show_map(folium_map):
display(HTML(folium_map._repr_html_().replace('padding-bottom:60%', 'padding-bottom:85%')))
m = folium.Map(location=map_center,
width=800, height=800,
zoom_start=11)
folium.Marker([statue_lat, statue_long], popup='Statue of Liberty').add_to(m)
show_map(m)
mean_hour_trips_count = grouped.groupby(['region'])['trips_count'].mean()
from matplotlib.colors import Normalize, LogNorm, rgb2hex
from matplotlib.cm import ScalarMappable
def plot_folium_map_with_colormesh(lat_space, long_space, density, cmap='hot', fill_opacity=0.5,
cell_filter_func=lambda x: True):
m = folium.Map(location=map_center,
width=800, height=800,
zoom_start=11)
density = density + 1
norm = LogNorm(vmin=np.min(density),
vmax=np.max(density))
mapper = ScalarMappable(norm=norm, cmap=cmap)
for i, lat_low in enumerate(lat_space[:-1]):
for j, lon_low in enumerate(long_space[:-1]):
if not cell_filter_func(density[i][j]):
continue
color = rgb2hex(mapper.to_rgba(density[i][j])[:3])
folium.Rectangle(bounds=((lat_low, lon_low),
(lat_space[i+1], long_space[j+1])),
color=color,
opacity=0.3,
weight=1,
fill_opacity=fill_opacity,
fill_color=color).add_to(m)
show_map(m)
%%time
plot_folium_map_with_colormesh(get_lat_space(50),
get_long_space(50),
mean_hour_trips_count.values.reshape((50, 50)).T,
cmap='YlOrRd')
len(mean_hour_trips_count[mean_hour_trips_count >= 5])
%%time
plot_folium_map_with_colormesh(get_lat_space(50),
get_long_space(50),
mean_hour_trips_count.values.reshape((50, 50)).T,
cell_filter_func=lambda x: x >= 5,
fill_opacity=0.8,
cmap='YlOrRd')
На карте мы видим, что все заполненные ячейки (т.е. со средним количеством поездок не менее 5) расположены на суше, т.е. поездки в них не являются невозможными.